-
Couldn't load subscription status.
- Fork 15k
[NFC][NVPTX-Tests] Split tcgen05 cta group tests #162597
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[NFC][NVPTX-Tests] Split tcgen05 cta group tests #162597
Conversation
This change splits cta_group::1 and cta_group::2 into two separate functions because they are not supported inside a single function.
|
@llvm/pr-subscribers-backend-nvptx Author: Rajat Bajpai (rajatbajpai) ChangesThis change splits cta_group::1 and cta_group::2 into two separate functions because they are not supported inside a single function. Patch is 56.90 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/162597.diff 4 Files Affected:
diff --git a/llvm/test/CodeGen/NVPTX/tcgen05-alloc.ll b/llvm/test/CodeGen/NVPTX/tcgen05-alloc.ll
index 41a0e81b5a6e6..1edb3871d9912 100644
--- a/llvm/test/CodeGen/NVPTX/tcgen05-alloc.ll
+++ b/llvm/test/CodeGen/NVPTX/tcgen05-alloc.ll
@@ -12,63 +12,104 @@ declare void @llvm.nvvm.tcgen05.alloc.cg2(ptr %addr, i32 %ncols)
declare void @llvm.nvvm.tcgen05.alloc.shared.cg1(ptr addrspace(3) %addr, i32 %ncols)
declare void @llvm.nvvm.tcgen05.alloc.shared.cg2(ptr addrspace(3) %addr, i32 %ncols)
-; CHECK-LABEL: test_tcgen05_alloc
-define void @test_tcgen05_alloc(ptr %addr, i32 %ncols) {
-; CHECK_PTX64-LABEL: test_tcgen05_alloc(
+define void @test_tcgen05_alloc_cg1(ptr %addr, i32 %ncols) {
+; CHECK_PTX64-LABEL: test_tcgen05_alloc_cg1(
; CHECK_PTX64: {
; CHECK_PTX64-NEXT: .reg .b32 %r<2>;
; CHECK_PTX64-NEXT: .reg .b64 %rd<2>;
; CHECK_PTX64-EMPTY:
; CHECK_PTX64-NEXT: // %bb.0:
-; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_param_0];
-; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_param_1];
+; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_cg1_param_0];
+; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_cg1_param_1];
; CHECK_PTX64-NEXT: tcgen05.alloc.cta_group::1.sync.aligned.b32 [%rd1], %r1;
-; CHECK_PTX64-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.b32 [%rd1], %r1;
; CHECK_PTX64-NEXT: ret;
;
-; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_alloc(
+; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_alloc_cg1(
; CHECK_PTX64_SHARED32: {
; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<2>;
; CHECK_PTX64_SHARED32-NEXT: .reg .b64 %rd<2>;
; CHECK_PTX64_SHARED32-EMPTY:
; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
-; CHECK_PTX64_SHARED32-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_param_0];
-; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_param_1];
+; CHECK_PTX64_SHARED32-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_cg1_param_0];
+; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_cg1_param_1];
; CHECK_PTX64_SHARED32-NEXT: tcgen05.alloc.cta_group::1.sync.aligned.b32 [%rd1], %r1;
-; CHECK_PTX64_SHARED32-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.b32 [%rd1], %r1;
; CHECK_PTX64_SHARED32-NEXT: ret;
call void @llvm.nvvm.tcgen05.alloc.cg1(ptr %addr, i32 %ncols)
- call void @llvm.nvvm.tcgen05.alloc.cg2(ptr %addr, i32 %ncols)
+ ret void
+}
+define void @test_tcgen05_alloc_cg2(ptr %addr, i32 %ncols) {
+; CHECK_PTX64-LABEL: test_tcgen05_alloc_cg2(
+; CHECK_PTX64: {
+; CHECK_PTX64-NEXT: .reg .b32 %r<2>;
+; CHECK_PTX64-NEXT: .reg .b64 %rd<2>;
+; CHECK_PTX64-EMPTY:
+; CHECK_PTX64-NEXT: // %bb.0:
+; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_cg2_param_0];
+; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_cg2_param_1];
+; CHECK_PTX64-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.b32 [%rd1], %r1;
+; CHECK_PTX64-NEXT: ret;
+;
+; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_alloc_cg2(
+; CHECK_PTX64_SHARED32: {
+; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<2>;
+; CHECK_PTX64_SHARED32-NEXT: .reg .b64 %rd<2>;
+; CHECK_PTX64_SHARED32-EMPTY:
+; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
+; CHECK_PTX64_SHARED32-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_cg2_param_0];
+; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_cg2_param_1];
+; CHECK_PTX64_SHARED32-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.b32 [%rd1], %r1;
+; CHECK_PTX64_SHARED32-NEXT: ret;
+ call void @llvm.nvvm.tcgen05.alloc.cg2(ptr %addr, i32 %ncols)
ret void
}
-; CHECK-LABEL: test_tcgen05_alloc_shared
-define void @test_tcgen05_alloc_shared(ptr addrspace(3) %addr, i32 %ncols) {
-; CHECK_PTX64-LABEL: test_tcgen05_alloc_shared(
+define void @test_tcgen05_alloc_shared_cg1(ptr addrspace(3) %addr, i32 %ncols) {
+; CHECK_PTX64-LABEL: test_tcgen05_alloc_shared_cg1(
; CHECK_PTX64: {
; CHECK_PTX64-NEXT: .reg .b32 %r<2>;
; CHECK_PTX64-NEXT: .reg .b64 %rd<2>;
; CHECK_PTX64-EMPTY:
; CHECK_PTX64-NEXT: // %bb.0:
-; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_shared_param_0];
-; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_shared_param_1];
+; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_shared_cg1_param_0];
+; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_shared_cg1_param_1];
; CHECK_PTX64-NEXT: tcgen05.alloc.cta_group::1.sync.aligned.shared::cta.b32 [%rd1], %r1;
-; CHECK_PTX64-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.shared::cta.b32 [%rd1], %r1;
; CHECK_PTX64-NEXT: ret;
;
-; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_alloc_shared(
+; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_alloc_shared_cg1(
; CHECK_PTX64_SHARED32: {
; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<3>;
; CHECK_PTX64_SHARED32-EMPTY:
; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
-; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_shared_param_0];
-; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r2, [test_tcgen05_alloc_shared_param_1];
+; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_shared_cg1_param_0];
+; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r2, [test_tcgen05_alloc_shared_cg1_param_1];
; CHECK_PTX64_SHARED32-NEXT: tcgen05.alloc.cta_group::1.sync.aligned.shared::cta.b32 [%r1], %r2;
-; CHECK_PTX64_SHARED32-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.shared::cta.b32 [%r1], %r2;
; CHECK_PTX64_SHARED32-NEXT: ret;
call void @llvm.nvvm.tcgen05.alloc.shared.cg1(ptr addrspace(3) %addr, i32 %ncols)
+ ret void
+}
+define void @test_tcgen05_alloc_shared_cg2(ptr addrspace(3) %addr, i32 %ncols) {
+; CHECK_PTX64-LABEL: test_tcgen05_alloc_shared_cg2(
+; CHECK_PTX64: {
+; CHECK_PTX64-NEXT: .reg .b32 %r<2>;
+; CHECK_PTX64-NEXT: .reg .b64 %rd<2>;
+; CHECK_PTX64-EMPTY:
+; CHECK_PTX64-NEXT: // %bb.0:
+; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_shared_cg2_param_0];
+; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_shared_cg2_param_1];
+; CHECK_PTX64-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.shared::cta.b32 [%rd1], %r1;
+; CHECK_PTX64-NEXT: ret;
+;
+; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_alloc_shared_cg2(
+; CHECK_PTX64_SHARED32: {
+; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<3>;
+; CHECK_PTX64_SHARED32-EMPTY:
+; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
+; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_shared_cg2_param_0];
+; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r2, [test_tcgen05_alloc_shared_cg2_param_1];
+; CHECK_PTX64_SHARED32-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.shared::cta.b32 [%r1], %r2;
+; CHECK_PTX64_SHARED32-NEXT: ret;
call void @llvm.nvvm.tcgen05.alloc.shared.cg2(ptr addrspace(3) %addr, i32 %ncols)
ret void
}
@@ -76,31 +117,50 @@ define void @test_tcgen05_alloc_shared(ptr addrspace(3) %addr, i32 %ncols) {
declare void @llvm.nvvm.tcgen05.dealloc.cg1(ptr addrspace(6) %tmem_addr, i32 %ncols)
declare void @llvm.nvvm.tcgen05.dealloc.cg2(ptr addrspace(6) %tmem_addr, i32 %ncols)
-; CHECK-LABEL: test_tcgen05_dealloc
-define void @test_tcgen05_dealloc(ptr addrspace(6) %tmem_addr, i32 %ncols) {
-; CHECK_PTX64-LABEL: test_tcgen05_dealloc(
+define void @test_tcgen05_dealloc_cg1(ptr addrspace(6) %tmem_addr, i32 %ncols) {
+; CHECK_PTX64-LABEL: test_tcgen05_dealloc_cg1(
; CHECK_PTX64: {
; CHECK_PTX64-NEXT: .reg .b32 %r<3>;
; CHECK_PTX64-EMPTY:
; CHECK_PTX64-NEXT: // %bb.0:
-; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_dealloc_param_0];
-; CHECK_PTX64-NEXT: ld.param.b32 %r2, [test_tcgen05_dealloc_param_1];
+; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_dealloc_cg1_param_0];
+; CHECK_PTX64-NEXT: ld.param.b32 %r2, [test_tcgen05_dealloc_cg1_param_1];
; CHECK_PTX64-NEXT: tcgen05.dealloc.cta_group::1.sync.aligned.b32 %r1, %r2;
-; CHECK_PTX64-NEXT: tcgen05.dealloc.cta_group::2.sync.aligned.b32 %r1, %r2;
; CHECK_PTX64-NEXT: ret;
;
-; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_dealloc(
+; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_dealloc_cg1(
; CHECK_PTX64_SHARED32: {
; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<3>;
; CHECK_PTX64_SHARED32-EMPTY:
; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
-; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_dealloc_param_0];
-; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r2, [test_tcgen05_dealloc_param_1];
+; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_dealloc_cg1_param_0];
+; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r2, [test_tcgen05_dealloc_cg1_param_1];
; CHECK_PTX64_SHARED32-NEXT: tcgen05.dealloc.cta_group::1.sync.aligned.b32 %r1, %r2;
-; CHECK_PTX64_SHARED32-NEXT: tcgen05.dealloc.cta_group::2.sync.aligned.b32 %r1, %r2;
; CHECK_PTX64_SHARED32-NEXT: ret;
call void @llvm.nvvm.tcgen05.dealloc.cg1(ptr addrspace(6) %tmem_addr, i32 %ncols)
+ ret void
+}
+define void @test_tcgen05_dealloc_cg2(ptr addrspace(6) %tmem_addr, i32 %ncols) {
+; CHECK_PTX64-LABEL: test_tcgen05_dealloc_cg2(
+; CHECK_PTX64: {
+; CHECK_PTX64-NEXT: .reg .b32 %r<3>;
+; CHECK_PTX64-EMPTY:
+; CHECK_PTX64-NEXT: // %bb.0:
+; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_dealloc_cg2_param_0];
+; CHECK_PTX64-NEXT: ld.param.b32 %r2, [test_tcgen05_dealloc_cg2_param_1];
+; CHECK_PTX64-NEXT: tcgen05.dealloc.cta_group::2.sync.aligned.b32 %r1, %r2;
+; CHECK_PTX64-NEXT: ret;
+;
+; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_dealloc_cg2(
+; CHECK_PTX64_SHARED32: {
+; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<3>;
+; CHECK_PTX64_SHARED32-EMPTY:
+; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
+; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_dealloc_cg2_param_0];
+; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r2, [test_tcgen05_dealloc_cg2_param_1];
+; CHECK_PTX64_SHARED32-NEXT: tcgen05.dealloc.cta_group::2.sync.aligned.b32 %r1, %r2;
+; CHECK_PTX64_SHARED32-NEXT: ret;
call void @llvm.nvvm.tcgen05.dealloc.cg2(ptr addrspace(6) %tmem_addr, i32 %ncols)
ret void
}
@@ -108,27 +168,42 @@ define void @test_tcgen05_dealloc(ptr addrspace(6) %tmem_addr, i32 %ncols) {
declare void @llvm.nvvm.tcgen05.relinq.alloc.permit.cg1()
declare void @llvm.nvvm.tcgen05.relinq.alloc.permit.cg2()
-; CHECK-LABEL: test_tcgen05_relinquish_alloc_permit
-define void @test_tcgen05_relinquish_alloc_permit() {
-; CHECK_PTX64-LABEL: test_tcgen05_relinquish_alloc_permit(
+define void @test_tcgen05_relinquish_alloc_permit_cg1() {
+; CHECK_PTX64-LABEL: test_tcgen05_relinquish_alloc_permit_cg1(
; CHECK_PTX64: {
; CHECK_PTX64-EMPTY:
; CHECK_PTX64-EMPTY:
; CHECK_PTX64-NEXT: // %bb.0:
; CHECK_PTX64-NEXT: tcgen05.relinquish_alloc_permit.cta_group::1.sync.aligned;
-; CHECK_PTX64-NEXT: tcgen05.relinquish_alloc_permit.cta_group::2.sync.aligned;
; CHECK_PTX64-NEXT: ret;
;
-; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_relinquish_alloc_permit(
+; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_relinquish_alloc_permit_cg1(
; CHECK_PTX64_SHARED32: {
; CHECK_PTX64_SHARED32-EMPTY:
; CHECK_PTX64_SHARED32-EMPTY:
; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
; CHECK_PTX64_SHARED32-NEXT: tcgen05.relinquish_alloc_permit.cta_group::1.sync.aligned;
-; CHECK_PTX64_SHARED32-NEXT: tcgen05.relinquish_alloc_permit.cta_group::2.sync.aligned;
; CHECK_PTX64_SHARED32-NEXT: ret;
call void @llvm.nvvm.tcgen05.relinq.alloc.permit.cg1()
+ ret void
+}
+define void @test_tcgen05_relinquish_alloc_permit_cg2() {
+; CHECK_PTX64-LABEL: test_tcgen05_relinquish_alloc_permit_cg2(
+; CHECK_PTX64: {
+; CHECK_PTX64-EMPTY:
+; CHECK_PTX64-EMPTY:
+; CHECK_PTX64-NEXT: // %bb.0:
+; CHECK_PTX64-NEXT: tcgen05.relinquish_alloc_permit.cta_group::2.sync.aligned;
+; CHECK_PTX64-NEXT: ret;
+;
+; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_relinquish_alloc_permit_cg2(
+; CHECK_PTX64_SHARED32: {
+; CHECK_PTX64_SHARED32-EMPTY:
+; CHECK_PTX64_SHARED32-EMPTY:
+; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
+; CHECK_PTX64_SHARED32-NEXT: tcgen05.relinquish_alloc_permit.cta_group::2.sync.aligned;
+; CHECK_PTX64_SHARED32-NEXT: ret;
call void @llvm.nvvm.tcgen05.relinq.alloc.permit.cg2()
ret void
}
diff --git a/llvm/test/CodeGen/NVPTX/tcgen05-commit.ll b/llvm/test/CodeGen/NVPTX/tcgen05-commit.ll
index 7981feb934c81..2e80c4c935814 100644
--- a/llvm/test/CodeGen/NVPTX/tcgen05-commit.ll
+++ b/llvm/test/CodeGen/NVPTX/tcgen05-commit.ll
@@ -11,57 +11,93 @@ declare void @llvm.nvvm.tcgen05.commit.cg2(ptr %bar_addr)
declare void @llvm.nvvm.tcgen05.commit.shared.cg1(ptr addrspace(3) %bar_addr)
declare void @llvm.nvvm.tcgen05.commit.shared.cg2(ptr addrspace(3) %bar_addr)
-; CHECK-LABEL: test_tcgen05_commit
-define void @test_tcgen05_commit(ptr %bar_addr) {
-; CHECK_PTX64-LABEL: test_tcgen05_commit(
+define void @test_tcgen05_commit_cg1(ptr %bar_addr) {
+; CHECK_PTX64-LABEL: test_tcgen05_commit_cg1(
; CHECK_PTX64: {
; CHECK_PTX64-NEXT: .reg .b64 %rd<2>;
; CHECK_PTX64-EMPTY:
; CHECK_PTX64-NEXT: // %bb.0:
-; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_param_0];
+; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_cg1_param_0];
; CHECK_PTX64-NEXT: tcgen05.commit.cta_group::1.mbarrier::arrive::one.shared::cluster.b64 [%rd1];
-; CHECK_PTX64-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.b64 [%rd1];
; CHECK_PTX64-NEXT: ret;
;
-; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_commit(
+; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_commit_cg1(
; CHECK_PTX64_SHARED32: {
; CHECK_PTX64_SHARED32-NEXT: .reg .b64 %rd<2>;
; CHECK_PTX64_SHARED32-EMPTY:
; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
-; CHECK_PTX64_SHARED32-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_param_0];
+; CHECK_PTX64_SHARED32-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_cg1_param_0];
; CHECK_PTX64_SHARED32-NEXT: tcgen05.commit.cta_group::1.mbarrier::arrive::one.shared::cluster.b64 [%rd1];
-; CHECK_PTX64_SHARED32-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.b64 [%rd1];
; CHECK_PTX64_SHARED32-NEXT: ret;
call void @llvm.nvvm.tcgen05.commit.cg1(ptr %bar_addr)
+ ret void
+}
+
+define void @test_tcgen05_commit_cg2(ptr %bar_addr) {
+; CHECK_PTX64-LABEL: test_tcgen05_commit_cg2(
+; CHECK_PTX64: {
+; CHECK_PTX64-NEXT: .reg .b64 %rd<2>;
+; CHECK_PTX64-EMPTY:
+; CHECK_PTX64-NEXT: // %bb.0:
+; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_cg2_param_0];
+; CHECK_PTX64-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.b64 [%rd1];
+; CHECK_PTX64-NEXT: ret;
+;
+; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_commit_cg2(
+; CHECK_PTX64_SHARED32: {
+; CHECK_PTX64_SHARED32-NEXT: .reg .b64 %rd<2>;
+; CHECK_PTX64_SHARED32-EMPTY:
+; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
+; CHECK_PTX64_SHARED32-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_cg2_param_0];
+; CHECK_PTX64_SHARED32-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.b64 [%rd1];
+; CHECK_PTX64_SHARED32-NEXT: ret;
call void @llvm.nvvm.tcgen05.commit.cg2(ptr %bar_addr)
ret void
}
-; CHECK-LABEL: test_tcgen05_commit_shared
-define void @test_tcgen05_commit_shared(ptr addrspace(3) %bar_addr) {
-; CHECK_PTX64-LABEL: test_tcgen05_commit_shared(
+define void @test_tcgen05_commit_shared_cg1(ptr addrspace(3) %bar_addr) {
+; CHECK_PTX64-LABEL: test_tcgen05_commit_shared_cg1(
; CHECK_PTX64: {
; CHECK_PTX64-NEXT: .reg .b64 %rd<2>;
; CHECK_PTX64-EMPTY:
; CHECK_PTX64-NEXT: // %bb.0:
-; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_shared_param_0];
+; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_shared_cg1_param_0];
; CHECK_PTX64-NEXT: tcgen05.commit.cta_group::1.mbarrier::arrive::one.shared::cluster.b64 [%rd1];
-; CHECK_PTX64-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.b64 [%rd1];
; CHECK_PTX64-NEXT: ret;
;
-; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_commit_shared(
+; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_commit_shared_cg1(
; CHECK_PTX64_SHARED32: {
; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<2>;
; CHECK_PTX64_SHARED32-EMPTY:
; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
-; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_commit_shared_param_0];
+; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_commit_shared_cg1_param_0];
; CHECK_PTX64_SHARED32-NEXT: tcgen05.commit.cta_group::1.mbarrier::arrive::one.shared::cluster.b64 [%r1];
-; CHECK_PTX64_SHARED32-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.b64 [%r1];
; CHECK_PTX64_SHARED32-NEXT: ret;
call void @llvm.nvvm.tcgen05.commit.shared.cg1(ptr addrspace(3) %bar_addr)
+ ret void
+}
+
+define void @test_tcgen05_commit_shared_cg2(ptr addrspace(3) %bar_addr) {
+; CHECK_PTX64-LABEL: test_tcgen05_commit_shared_cg2(
+; CHECK_PTX64: {
+; CHECK_PTX64-NEXT: .reg .b64 %rd<2>;
+; CHECK_PTX64-EMPTY:
+; CHECK_PTX64-NEXT: // %bb.0:
+; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_shared_cg2_param_0];
+; CHECK_PTX64-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.b64 [%rd1];
+; CHECK_PTX64-NEXT: ret;
+;
+; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_commit_shared_cg2(
+; CHECK_PTX64_SHARED32: {
+; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<2>;
+; CHECK_PTX64_SHARED32-EMPTY:
+; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
+; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_commit_shared_cg2_param_0];
+; CHECK_PTX64_SHARED32-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.b64 [%r1];
+; CHECK_PTX64_SHARED32-NEXT: ret;
call void @llvm.nvvm.tcgen05.commit.shared.cg2(ptr addrspace(3) %bar_addr)
ret void
@@ -72,66 +108,106 @@ declare void @llvm.nvvm.tcgen05.commit.mc.cg2(ptr %bar_addr, i16 %cta_mask)
declare void @llvm.nvvm.tcgen05.commit.mc.shared.cg1(ptr addrspace(3) %bar_addr, i16 %cta_mask)
declare void @llvm.nvvm.tcgen05.commit.mc.shared.cg2(ptr addrspace(3) %bar_addr, i16 %cta_mask)
-; CHECK-LABEL: test_tcgen05_commit_mc
-define void @test_tcgen05_commit_mc(ptr %bar_addr, i16 %cta_mask) {
-; CHECK_PTX64-LABEL: test_tcgen05_commit_mc(
+define void @test_tcgen05_commit_mc_cg1(ptr %bar_addr, i16 %cta_mask) {
+; CHECK_PTX64-LABEL: test_tcgen05_commit_mc_cg1(
; CHECK_PTX64: {
; CHECK_PTX64-NEXT: .reg .b16 %rs<2>;
; CHECK_PTX64-NEXT: .reg .b64 %rd<2>;
; CHECK_PTX64-EMPTY:
; CHECK_PTX64-NEXT: // %bb.0:
-; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_mc_param_0];
-; CHECK_PTX64-NEXT: ld.param.b16 %rs1, [test_tcgen05_commit_mc_param_1];
+; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_mc_cg1_param_0];
+; CHECK_PTX64-NEXT: ld.param.b16 %rs1, [test_tcgen05_commit_mc_cg1_param_1];
; CHECK_PTX64-NEXT: tcgen05.commit.cta_group::1.mbarrier::arrive::one.shared::cluster.multicast::cluster.b64 [%rd1], %rs1;
-; CHECK_PTX64-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.multicast::cluster.b64 [%rd1], %rs1;
; CHECK_PTX64-NEXT: ret;
;
-; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_commit_mc(
+; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_commit_mc_cg1(
; CHECK_PTX64_SHARED32: {
; CHECK_PTX64_SHARED32-NEXT: .reg .b16 %rs<2>;
; CHECK_PTX64_SHARED32-NEXT: .reg .b64 %rd<2>;
; CHECK_PTX64_SHARED32-EMPTY:
; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
-; CHECK_PTX64_SHARED32-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_mc_param_0];
-; CHECK_PTX64_SHARED32-NEXT: ld.param.b16 %rs1, [test_tcgen05_commit_mc_param_1];
+; CHECK_PTX64_SHARED32-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_mc_cg1_param_0];
+; CHECK_PTX64_SHARED32-NEXT: ld.param.b16 %rs1, [test_tcgen05_commit_mc_cg1_param_1];
; CHECK_PTX64_SHARED32-NEXT: tcgen05.commit.cta_group::1.mbarrier::arrive::one.shared::cluster.multicast::cluster.b64 [%rd1], %rs1;
-; CHECK_PTX64_SHARED32-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.multicast::cluster.b64 [%rd1], %rs1;
; CHECK_PTX64_SHARED32-NEXT: ret;
call void @llvm.nvvm.tcgen05.commit.mc.cg1(ptr %bar_addr, i16 %cta_mask)
+ ret void
+}
+define void @test_tcgen05_commit_mc_cg2(ptr %bar_addr, i16 %cta_mask) {
+; CHECK_PTX64-LABEL: test_tcgen05_commit_mc_cg2(
+; CHECK_PTX64: {
+; CHECK_PTX64-NEXT: .reg .b16 %rs<2>;
+; CHECK_PTX64-NEXT: .reg .b64 %rd<2>;
+; CHECK_PTX64-EMPTY:
+; CHECK_PTX64-NEXT: // %bb.0:
+; CHECK_PTX64-NEXT: ld.param.b64 %...
[truncated]
|
This change splits cta_group::1 and cta_group::2 into two separate functions because they are not supported inside a single function. ptxas from 13.0 release onwards emits an error message for this case.
This change splits cta_group::1 and cta_group::2 into two separate functions because they are not supported inside a single function. ptxas from 13.0 release onwards emits an error message for this case.
This change splits cta_group::1 and cta_group::2 into two separate functions because they are not supported inside a single function. ptxas from 13.0 release onwards emits an error message for this case.
This change splits cta_group::1 and cta_group::2 into two separate functions because they are not supported inside a single function. ptxas from 13.0 release onwards emits an error message for this case.
This change splits cta_group::1 and cta_group::2 into two separate functions because they are not supported inside a single function. ptxas from 13.0 release onwards emits an error message for this case.